{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Classifying Password Strength with Machine Learning\n",
    "+ "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# EDA Pkgs\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# ML Pkgs\n",
    "from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load Dataset\n",
    "df = pd.read_csv(\"cleanpasswordlist.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>password</th>\n",
       "      <th>strength</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>kzde5577</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>kino3434</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>visi7k1yr</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>megzy123</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>lamborghin1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0     password  strength\n",
       "0           0     kzde5577         1\n",
       "1           1     kino3434         1\n",
       "2           2    visi7k1yr         1\n",
       "3           3     megzy123         1\n",
       "4           4  lamborghin1         1"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df[['password', 'strength']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(669425, 2)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# EDA\n",
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>strength</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>669425.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>0.990217</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>0.507936</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            strength\n",
       "count  669425.000000\n",
       "mean        0.990217\n",
       "std         0.507936\n",
       "min         0.000000\n",
       "25%         1.000000\n",
       "50%         1.000000\n",
       "75%         1.000000\n",
       "max         2.000000"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['password', 'strength'], dtype='object')"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "password    1\n",
       "strength    0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Check For missing values\n",
    "df.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    496650\n",
       "0     89662\n",
       "2     83113\n",
       "Name: strength, dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Check Value Counts for Strength\n",
    "df['strength'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x7f8449d41710>"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAD1CAYAAAClSgmzAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAPmUlEQVR4nO3df6zddX3H8efLVhyZUxC6hrXFktjFVBdRG6hxfzjISlGz8ocayLI2pLF/CJvGJbPuHzIdC/4zJomSNaOxmE1s2AyNVrqmSpZlAXqZDCyM9Q5htEGobYURo6743h/303m8O597TxHOuaXPR3Jyvt/35/P9fj43pz2v+/1xzk1VIUnSMK+Z9AQkSQuXISFJ6jIkJEldhoQkqcuQkCR1GRKSpK7Fk57Ay+3888+vlStXTnoaknRaeeCBB35QVUtm1191IbFy5UqmpqYmPQ1JOq0keXJY3dNNkqQuQ0KS1GVISJK6RgqJJE8keTjJg0mmWu1NSfYmOdiez231JLklyXSSh5K8a2A/m1r/g0k2DdTf3fY/3bbNXGNIksbjVI4kfqeqLq6qNW19K7CvqlYB+9o6wJXAqvbYAtwKM2/4wA3ApcAlwA0Db/q3Ah8d2G79PGNIksbglzndtAHY0ZZ3AFcN1G+vGfcC5yS5ALgC2FtVx6rqOLAXWN/a3lBV99bMV9LePmtfw8aQJI3BqCFRwD8meSDJllZbWlVPt+XvA0vb8jLgqYFtD7XaXPVDQ+pzjSFJGoNRPyfx21V1OMmvA3uT/PtgY1VVklf0D1PMNUYLri0AF1544Ss5DUk6o4wUElV1uD0/m+RrzFxTeCbJBVX1dDtl9GzrfhhYMbD58lY7DLxvVv2eVl8+pD9zjDF7ftuAbQBr1qwZ619RWrn1G+McbuyeuOkDk56CpAma93RTkl9N8msnl4F1wHeBXcDJO5Q2AXe15V3AxnaX01rguXbKaA+wLsm57YL1OmBPa3s+ydp2V9PGWfsaNoYkaQxGOZJYCnyt3ZW6GPi7qro7yX5gZ5LNwJPAR1r/3cD7gWngR8C1AFV1LMlngf2t32eq6lhb/hjwJeBs4JvtAXBTZwxJ0hjMGxJV9TjwjiH1o8DlQ+oFXNfZ13Zg+5D6FPD2UceQJI2Hn7iWJHUZEpKkLkNCktRlSEiSugwJSVKXISFJ6jIkJEldhoQkqcuQkCR1GRKSpC5DQpLUZUhIkroMCUlSlyEhSeoyJCRJXYaEJKnLkJAkdRkSkqQuQ0KS1GVISJK6DAlJUpchIUnqMiQkSV2GhCSpy5CQJHUZEpKkLkNCktRlSEiSugwJSVKXISFJ6jIkJEldhoQkqcuQkCR1jRwSSRYl+U6Sr7f1i5Lcl2Q6yVeTnNXqr2vr06195cA+Pt3qjyW5YqC+vtWmk2wdqA8dQ5I0HqdyJPFx4NGB9c8BN1fVW4DjwOZW3wwcb/WbWz+SrAauBt4GrAe+2IJnEfAF4EpgNXBN6zvXGJKkMRgpJJIsBz4A/E1bD3AZcGfrsgO4qi1vaOu09stb/w3AHVX1k6r6HjANXNIe01X1eFX9FLgD2DDPGJKkMRj1SOKvgD8BftbWzwN+WFUn2vohYFlbXgY8BdDan2v9/68+a5tefa4xJEljMG9IJPkg8GxVPTCG+bwkSbYkmUoydeTIkUlPR5JeNUY5kngv8HtJnmDmVNBlwOeBc5Isbn2WA4fb8mFgBUBrfyNwdLA+a5te/egcY/yCqtpWVWuqas2SJUtG+JEkSaOYNySq6tNVtbyqVjJz4flbVfX7wLeBD7Vum4C72vKutk5r/1ZVVatf3e5+ughYBdwP7AdWtTuZzmpj7Grb9MaQJI3BL/M5iU8Bn0wyzcz1g9ta/TbgvFb/JLAVoKoOADuBR4C7geuq6sV2zeF6YA8zd0/tbH3nGkOSNAaL5+/yc1V1D3BPW36cmTuTZvf5MfDhzvY3AjcOqe8Gdg+pDx1DkjQefuJaktRlSEiSugwJSVKXISFJ6jIkJEldhoQkqcuQkCR1GRKSpC5DQpLUZUhIkroMCUlSlyEhSeoyJCRJXYaEJKnLkJAkdRkSkqQuQ0KS1GVISJK6DAlJUpchIUnqMiQkSV2GhCSpy5CQJHUZEpKkLkNCktRlSEiSugwJSVKXISFJ6jIkJEldhoQkqcuQkCR1GRKSpC5DQpLUNW9IJPmVJPcn+bckB5L8WatflOS+JNNJvprkrFZ/XVufbu0rB/b16VZ/LMkVA/X1rTadZOtAfegYkqTxGOVI4ifAZVX1DuBiYH2StcDngJur6i3AcWBz678ZON7qN7d+JFkNXA28DVgPfDHJoiSLgC8AVwKrgWtaX+YYQ5I0BvOGRM14oa2+tj0KuAy4s9V3AFe15Q1tndZ+eZK0+h1V9ZOq+h4wDVzSHtNV9XhV/RS4A9jQtumNIUkag5GuSbTf+B8EngX2Av8J/LCqTrQuh4BlbXkZ8BRAa38OOG+wPmubXv28OcaYPb8tSaaSTB05cmSUH0mSNIKRQqKqXqyqi4HlzPzm/9ZXdFanqKq2VdWaqlqzZMmSSU9Hkl41Tunupqr6IfBt4D3AOUkWt6blwOG2fBhYAdDa3wgcHazP2qZXPzrHGJKkMRjl7qYlSc5py2cDvws8ykxYfKh12wTc1ZZ3tXVa+7eqqlr96nb300XAKuB+YD+wqt3JdBYzF7d3tW16Y0iSxmDx/F24ANjR7kJ6DbCzqr6e5BHgjiR/DnwHuK31vw34cpJp4Bgzb/pU1YEkO4FHgBPAdVX1IkCS64E9wCJge1UdaPv6VGcMSdIYzBsSVfUQ8M4h9ceZuT4xu/5j4MOdfd0I3DikvhvYPeoYkqTx8BPXkqQuQ0KS1GVISJK6DAlJUpchIUnqMiQkSV2GhCSpy5CQJHUZEpKkLkNCktRlSEiSugwJSVKXISFJ6jIkJEldhoQkqcuQkCR1GRKSpC5DQpLUZUhIkroMCUlSlyEhSeoyJCRJXYaEJKnLkJAkdRkSkqQuQ0KS1GVISJK6DAlJUpchIUnqMiQkSV2GhCSpy5CQJHXNGxJJViT5dpJHkhxI8vFWf1OSvUkOtudzWz1JbkkyneShJO8a2Nem1v9gkk0D9Xcnebhtc0uSzDWGJGk8RjmSOAH8cVWtBtYC1yVZDWwF9lXVKmBfWwe4EljVHluAW2HmDR+4AbgUuAS4YeBN/1bgowPbrW/13hiSpDGYNySq6umq+te2/N/Ao8AyYAOwo3XbAVzVljcAt9eMe4FzklwAXAHsrapjVXUc2Ausb21vqKp7q6qA22fta9gYkqQxOKVrEklWAu8E7gOWVtXTren7wNK2vAx4amCzQ602V/3QkDpzjCFJGoORQyLJ64G/Bz5RVc8PtrUjgHqZ5/YL5hojyZYkU0mmjhw58kpOQ5LOKCOFRJLXMhMQf1tV/9DKz7RTRbTnZ1v9MLBiYPPlrTZXffmQ+lxj/IKq2lZVa6pqzZIlS0b5kSRJIxjl7qYAtwGPVtVfDjTtAk7eobQJuGugvrHd5bQWeK6dMtoDrEtybrtgvQ7Y09qeT7K2jbVx1r6GjSFJGoPFI/R5L/AHwMNJHmy1PwVuAnYm2Qw8CXykte0G3g9MAz8CrgWoqmNJPgvsb/0+U1XH2vLHgC8BZwPfbA/mGEOSNAbzhkRV/TOQTvPlQ/oXcF1nX9uB7UPqU8Dbh9SPDhtDkjQefuJaktRlSEiSugwJSVKXISFJ6jIkJEldhoQkqcuQkCR1GRKSpC5DQpLUZUhIkroMCUlSlyEhSeoyJCRJXYaEJKnLkJAkdRkSkqQuQ0KS1GVISJK6DAlJUpchIUnqMiQkSV2GhCSpy5CQJHUZEpKkLkNCktRlSEiSugwJSVKXISFJ6jIkJEldhoQkqcuQkCR1GRKSpC5DQpLUNW9IJNme5Nkk3x2ovSnJ3iQH2/O5rZ4ktySZTvJQkncNbLOp9T+YZNNA/d1JHm7b3JIkc40hSRqfUY4kvgSsn1XbCuyrqlXAvrYOcCWwqj22ALfCzBs+cANwKXAJcMPAm/6twEcHtls/zxiSpDGZNySq6p+AY7PKG4AdbXkHcNVA/faacS9wTpILgCuAvVV1rKqOA3uB9a3tDVV1b1UVcPusfQ0bQ5I0Ji/1msTSqnq6LX8fWNqWlwFPDfQ71Gpz1Q8Nqc81hiRpTH7pC9ftCKBehrm85DGSbEkylWTqyJEjr+RUJOmM8lJD4pl2qoj2/GyrHwZWDPRb3mpz1ZcPqc81xv9TVduqak1VrVmyZMlL/JEkSbO91JDYBZy8Q2kTcNdAfWO7y2kt8Fw7ZbQHWJfk3HbBeh2wp7U9n2Rtu6tp46x9DRtDkjQmi+frkOQrwPuA85McYuYupZuAnUk2A08CH2nddwPvB6aBHwHXAlTVsSSfBfa3fp+pqpMXwz/GzB1UZwPfbA/mGEOSNCbzhkRVXdNpunxI3wKu6+xnO7B9SH0KePuQ+tFhY0gvl5VbvzHpKbyinrjpA5Oegl4F/MS1JKnLkJAkdc17ukmSFiJPF46HRxKSpC5DQpLUZUhIkroMCUlSlyEhSeoyJCRJXYaEJKnLkJAkdRkSkqQuQ0KS1GVISJK6DAlJUpchIUnqMiQkSV2GhCSpy5CQJHUZEpKkLkNCktRlSEiSugwJSVKXISFJ6jIkJEldhoQkqcuQkCR1GRKSpC5DQpLUZUhIkroMCUlSlyEhSeoyJCRJXQs+JJKsT/JYkukkWyc9H0k6kyzokEiyCPgCcCWwGrgmyerJzkqSzhwLOiSAS4Dpqnq8qn4K3AFsmPCcJOmMsXjSE5jHMuCpgfVDwKWzOyXZAmxpqy8keWwMc5uU84EfjGuwfG5cI50RfO1Ob6/21+/Nw4oLPSRGUlXbgG2Tnsc4JJmqqjWTnodOna/d6e1Mff0W+ummw8CKgfXlrSZJGoOFHhL7gVVJLkpyFnA1sGvCc5KkM8aCPt1UVSeSXA/sARYB26vqwISnNWlnxGm1Vylfu9PbGfn6paomPQdJ0gK10E83SZImyJCQJHUZEpKkrgV94Vo6nSV5KzPfELCslQ4Du6rq0cnNSqNqr98y4L6qemGgvr6q7p7czMbLI4nTVJJrJz0H9SX5FDNfIxPg/vYI8BW/qHLhS/JHwF3AHwLfTTL4dUB/MZlZTYZ3N52mkvxXVV046XlouCT/Abytqv5nVv0s4EBVrZrMzDSKJA8D76mqF5KsBO4EvlxVn0/ynap650QnOEaeblrAkjzUawKWjnMuOmU/A34DeHJW/YLWpoXtNSdPMVXVE0neB9yZ5M3M/P87YxgSC9tS4Arg+Kx6gH8Z/3R0Cj4B7EtykJ9/SeWFwFuA6yc2K43qmSQXV9WDAO2I4oPAduC3Jju18TIkFravA68/+Q91UJJ7xj8djaqq7k7ym8x83f3ghev9VfXi5GamEW0ETgwWquoEsDHJX09mSpPhNQlJUpd3N0mSugwJSVKXISFJ6jIkJEldhoQkqet/AVSCt7ZjejZrAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "df['strength'].value_counts().plot(kind='bar')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "26951"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Longest Password\n",
    "df['password'].str.len().idxmax()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "password    In0LnUoff8wfayJGqzelyDqg4AMl9gBhgl3T2iZeONzh5g...\n",
       "strength                                                    2\n",
       "Name: 26951, dtype: object"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.iloc[26951]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "459739"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['password'].str.len().idxmin()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Split Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['password'] = df['password'].fillna('unknown')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "xfeatures = df['password']\n",
    "ylabels = df['strength']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0             kzde5577\n",
       "1             kino3434\n",
       "2            visi7k1yr\n",
       "3             megzy123\n",
       "4          lamborghin1\n",
       "              ...     \n",
       "669420      10redtux10\n",
       "669421       infrared1\n",
       "669422    184520socram\n",
       "669423       marken22a\n",
       "669424        fxx4pw4g\n",
       "Name: password, Length: 669425, dtype: object"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['password']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "vectorizer = CountVectorizer()\n",
    "X = vectorizer.fit_transform(xfeatures)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Split Data\n",
    "X_train,X_test,y_train,y_test = train_test_split(X,ylabels,test_size=0.3,random_state=42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Model Building"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
      "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
      "\n",
      "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
      "    https://scikit-learn.org/stable/modules/preprocessing.html\n",
      "Please also refer to the documentation for alternative solver options:\n",
      "    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
      "  extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
       "                   intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
       "                   multi_class='auto', n_jobs=None, penalty='l2',\n",
       "                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,\n",
       "                   warm_start=False)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "logit = LogisticRegression()\n",
    "logit.fit(X_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy : 0.7444679028820682\n"
     ]
    }
   ],
   "source": [
    "print(\"Accuracy :\",logit.score(X_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1 1 1 1 1 1 1 1]\n"
     ]
    }
   ],
   "source": [
    "\n",
    "\n",
    "X_predict1 = ['password',\n",
    "             'pYthonqwas'\n",
    "             'faizanahmad',\n",
    "             'password##',\n",
    "             'ajd1348#28t**',\n",
    "             'ffffffffff',\n",
    "             'kuiqwasdi',\n",
    "             '123456',\n",
    "             'abcdef']\n",
    "\n",
    "X_predict1 = vectorizer.transform(X_predict1)\n",
    "y_Predict1 = logit.predict(X_predict1)\n",
    "print(y_Predict1)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "penalty='l2',multi_class='ovr')\t"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "import joblib"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "logit_model = open(\"logit_pswd_model.pkl\",\"wb\")\n",
    "joblib.dump(logit,logit_model)\n",
    "logit_model.close()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "pswd_cv = open(\"pswd_cv.pkl\",\"wb\")\n",
    "joblib.dump(vectorizer,pswd_cv)\n",
    "pswd_cv.close()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.naive_bayes import MultinomialNB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "nv = MultinomialNB()\n",
    "nv.fit(X_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy : 0.7445425936622383\n"
     ]
    }
   ],
   "source": [
    "print(\"Accuracy :\",nv.score(X_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "X_predict1 = ['password',\n",
    "             'pYthonqwas'\n",
    "             'faizanahmad',\n",
    "             'password##',\n",
    "             'ajd1348#28t**',\n",
    "             'ffffffffff',\n",
    "             'kuiqwasdi',\n",
    "             '123456',\n",
    "             'abcdef']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_predict2 = vectorizer.transform(X_predict1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1 1 1 1 1 1 1 1]\n"
     ]
    }
   ],
   "source": [
    "y_Predict2 = nv.predict(X_predict2)\n",
    "print(y_Predict2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "nv_model = open(\"nv_pswd_model.pkl\",\"wb\")\n",
    "joblib.dump(nv,nv_model)\n",
    "nv_model.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
